* Kristoffer Balle Hvidberg
* Field of Study and Financial Problems: How Economics Reduces the Risk of Default

********************************************************************************

clear all
set more off

global data "K:\Workdata\704365\kbh\FSFP_Replication_Package\data"
global temp "K:\Workdata\704365\kbh\FSFP_Replication_Package\data\temp"
global raw  "K:\Workdata\704365\kbh\FSFP_Replication_Package\data\raw"

********************************************************************************

* Check register data for duplicate observations and merge the relevant variables

use "$raw\bef.dta", clear // Accessed 2020-01-24
	sort familie_id
	merge m:1 familie_id year using "$raw\fam.dta" // Accessed 2020-02-11
	drop if _merge==2
	drop _merge
	sort pnr year
	duplicates report pnr year
	keep pnr alder civ_vfra civst efalle far_id foed_dag ie_type koen kom mor_id befupdsourceyear year antboernf famsourceyear
	compress
save "$raw\bef_edit.dta"

use "$raw\bef_fam.dta", clear // Accessed 2020-01-24
	duplicates report pnr year
	keep pnr alder foed_dag befupdsourceyear year
	compress
save "$raw\bef_fam_edit.dta"

use "$raw\ind.dta", clear // Accessed 2020-01-25
	local vars = "arblhu arblhumv atpsaml bankakt bankgaeld dispon_13 ejendomsvurdering erhvervsindk_13 folkefortid_13 fosfufrd gaeld_til_off honny indestpi jubjub kapindkp kapitialt kapitpriv kapitvirk kiskat koejd kontanthj_13 kursakt loenmv_13 netovskud_13 oblakt oblgaeld off_overforsel_13 offpens offpens_efterlon_13 pantakt pantgaeld perindkialt_13 peroevrigformue_13 qaktie qaktivf qaktivf_ny05 qarblos qbistyd qpassiv qpassivn"
	keep pnr `vars' indupd01sourceyear year indupdsourceyear
	drop indupdsourceyear
	replace year=indupd01sourceyear
	foreach x in `vars' {
		replace `x' = round(`x')
	}
	duplicates drop
	duplicates tag pnr year, gen(dup)
	drop if dup>0
	drop dup
	duplicates report pnr year
save "$raw\ind_edit.dta"

use "$raw\ind_fam.dta", clear // Accessed 2020-01-26
	local vars = "dispon_13 erhvervsindk_13 perindkialt_13"
	keep pnr `vars' indupd01sourceyear year indupdsourceyear
	drop indupdsourceyear
	replace year=indupd01sourceyear
	foreach x in `vars' {
		replace `x' = round(`x')
	}
	duplicates drop
	duplicates tag pnr year, gen(dup)
	drop if dup>0
	drop dup
	duplicates report pnr year
save "$raw\ind_fam_edit.dta"
	
use "$raw\idap.dta", clear // Accessed 2020-01-24
	drop if mi(year)
	duplicates report pnr year 
	keep pnr arledgr arledgr_brutto arledgr_netto erhver pstill psoc_status_kode idapupdsourceyear year
	compress
save "$raw\idap_edit.dta"

use "$raw\idan.dta", clear // Accessed 2020-01-24
	set seed 1702
	gen random = runiform()
	drop if mi(year)
	drop ansxfrem ansxtilb ansaar arbgnr arbled arbnr arbstk atp_beloeb bredt_loen_beloeb brutto_gradaar cvrnr jobatp joblon kom lbnr lonmfrem lonmtilb netto_gradaar smal_timeloen smalt_loen_beloeb
	drop if mi(arb_hoved_bra_db07) & mi(arb_sektorkode) & mi(disco_kode) & mi(persbrc) & mi(persbrc07) & mi(sektor)
	replace ansdage_2008 = ansdage if mi(ansdage_2008)
	drop ansdage
	bysort pnr year: egen max_ans = max(ansdage_2008)
	drop if ansdage_2008!=max_ans // Keep affiliation with longest employment spell
	duplicates tag pnr year, gen(dup)
	sort pnr year jobkat random // Keep observation with lowest job category, e.g. 1 indicates full time
	count if dup>0 & pnr==pnr[_n-1] & !mi(jobkat[_n-1])
	local i = r(N)
	if i>0 {
		drop if dup>0 & pnr==pnr[_n-1] & !mi(jobkat[_n-1])
		count if dup>0 & pnr==pnr[_n-1] & !mi(jobkat[_n-1])
		local i = r(N)
	}
	drop dup
	duplicates drop
	duplicates tag pnr year, gen(dup)
	drop if dup>0
	drop dup
	duplicates report pnr year
	drop random
	compress
save "$raw\idan_edit.dta"

use "$raw\indh.dta", clear // Accessed 2020-01-24
	keep qpripen qpripenk qpripenl qarbpenk qarbpenl pnr year indhsourceyear
	duplicates report pnr year
	compress
save "$raw\indh_edit.dta"

use "$raw\udda.dta", clear // Accessed 2020-01-24
	duplicates report pnr year
	compress
save "$raw\udda_edit.dta"

use "$raw\udda_fam.dta", clear // Accessed 2020-01-24
	duplicates report pnr year
	compress
save "$raw\udda_fam_edit.dta"

use "$raw\udg.dta", clear // Accessed 2020-01-24
	gen slutaar=year(karakter_udd_vtil)
	drop if mi(slutaar)
	tostring audd, replace format(%04.0f)
	sort pnr slutaar
	duplicates tag pnr slutaar, gen(dup)
	tab dup // Few cases of duplicate observations. Often same GPA. If not, keep highest GPA. 
	gsort pnr slutaar - karakter_udd audd
	drop if pnr==pnr[_n-1] & slutaar==slutaar[_n-1] & dup>0
	duplicates report pnr slutaar
	drop dup
	* Create a "panel" with GPA's to merge other data sets
	expand 24
	drop year
	sort pnr slutaar audd
	gen year=.
	replace year=1993 if pnr!=pnr[_n-1] | (pnr==pnr[_n-1] & slutaar!=slutaar[_n-1]) |  (pnr==pnr[_n-1] & slutaar==slutaar[_n-1]) & audd!=audd[_n-1]
	replace year=year[_n-1]+1 if mi(year)
	drop if slutaar>year
	sort pnr year slutaar
	drop if pnr==pnr[_n+1] & year==year[_n+1]
	duplicates report pnr year
	sort pnr year
	drop karakter_udd_vtil cprtjek 
	label var slutaar "Year of completion"
	label var year "Year"
	compress
save "$raw\udg_edit.dta"

use "$raw\pensform.dta", clear // Accessed 2020-01-24
	gen year=pensformsourceyear
	sort pnr kontraktnr year
	keep pnr kontraktnr year pensdepotblb andakkrgublb andkolbonusblb andsaebonusblb
	drop if mi(pnr)
	drop if mi(year)
	collapse (sum) pensdepotblb (last) andakkrgublb (last) andkolbonusblb (last) andsaebonusblb, by(pnr kontraktnr year)
	egen pensionsdepot = rowtotal(pensdepotblb andakkrgublb andkolbonusblb andsaebonusblb)
	drop pensdepotblb andakkrgublb andkolbonusblb andsaebonusblb kontraktnr
	collapse pensionsdepot, by(pnr year)
	duplicates report pnr year
	label var pensionsdepot "Retirement savings"
	compress
save "$raw\pensform_edit"

use "$raw\urte.dta", clear // Accessed 2020-01-24
	drop if ejerstatu_kod=="2" // Drop associations, companies, foudations...
	drop if mi(kto_nr) // Drop if account number is missing
	gen restance_temp=(rest_mark_kod==2) if !mi(rest_mark_kod) // Generate indicator of default and delinquency
	bysort pnr kto_nr indk_aar: egen restance=max(restance_temp)
	drop restance_temp 
	duplicates report pnr kto_nr indk_aar registmp rette_kod rest_gaeld_blb rnt_blb
	sort pnr kto_nr indk_aar registmp rette_kod rest_gaeld_blb rnt_blb op_se_nr ejerstatu_kod ibert_se_nr ktotype_kod rest_mark_kod, stable
	drop bankrgist_nr ibert_se_nr op_se_nr kurs_vaerdi_blb  rest_lb_aar_ant rnt_fod_pct 	
	collapse (last) ejerstatu_kod (last) ktotype_kod (sum) rest_gaeld_blb (last) rest_mark_kod (sum) rnt_blb (last) restance, by(pnr kto_nr indk_aar) fast
	duplicates report pnr kto_nr indk_aar
	bysort pnr indk_aar: egen rest_all = max(restance) // Indicator for default and delinquency on at least one loan in a given year
	label var rest_all "All defaults, incl. before collapse of accounts"
	levelsof ktotype_kod, local(type)
	foreach x in `type' {
		gen 	rest`x'_temp=(rest_mark_kod==2) if !mi(rest_mark_kod) & rest_gaeld_blb>0 & ktotype_kod==`x'
		bysort pnr indk_aar: egen rest`x'=max(rest`x'_temp) 
		label var rest`x' "Defaults after collapse (Account type=`x')"
		drop rest`x'_temp
	}
	* Calculate interest rate using deposits primo and ultimo in a year and interest paid
	sort pnr kto_nr indk_aar
	gen brug1=1 if 	pnr==pnr[_n-1] /// 
				&	kto_nr==kto_nr[_n-1] ///
				& 	indk_aar-1==indk_aar[_n-1]	
	rename rest_gaeld_blb saldo_ultimo
	gen saldo_primo=saldo_ultimo[_n-1] if brug1==1
	gen saldo_medio=(saldo_primo+saldo_ultimo)/2 if brug1==1
	gen medio=1  if saldo_medio>=1000 ///
				 & 	rnt_blb>0 ///
				 &	saldo_primo>=0 /// 
				 &	saldo_ultimo>=0 ///
				 &  ktotype_kod==56
	gen ultimo=1 if	saldo_ultimo>=1000 ///
				 & 	rnt_blb>0 ///
				 &  ktotype_kod==56
	gen rente_medio =rnt_blb/saldo_medio  if medio ==1
	gen rente_ultimo=rnt_blb/saldo_ultimo if ultimo==1
	drop brug1 medio ultimo
	sort pnr kto_nr indk_aar
	rename indk_aar year
	bysort pnr year: egen max_rente=max(rente_medio) // Highest interest rate on any loan in a given year
	bysort pnr year: egen antal_restance = total(rest_all), missing // Number of defaults and delinquencies
	levelsof ktotype_kod, local(type)
	foreach x in `type' {
		gen 	temp=saldo_ultimo if rest_mark_kod==2 & ktotype_kod==`x'
		bysort pnr year: egen blb_rest`x'=total(temp) 
		label var blb_rest`x' "Amount in default (Account type=`x')"
		drop temp
	}
	drop kto_nr ktotype_kod saldo_ultimo rest_mark_kod rnt_blb restance saldo_primo saldo_medio rente_medio rente_ultimo ejerstatu_kod ejerstatu_kod
	duplicates drop
	duplicates report pnr year
	compress
save "$raw\urte_edit.dta"

use "$raw\urte_efalle.dta", clear // Accessed 2022-03-18
	*Same procedure as for urte above (for accounts of partners)
	drop if ejerstatu_kod=="2" 
	drop if mi(kto_nr) 
	gen restance_temp=(rest_mark_kod==2) if !mi(rest_mark_kod)
	bysort pnr kto_nr indk_aar: egen restance=max(restance_temp)
	drop restance_temp 
	duplicates report pnr kto_nr indk_aar registmp rette_kod rest_gaeld_blb rnt_blb
	sort pnr kto_nr indk_aar registmp rette_kod rest_gaeld_blb rnt_blb op_se_nr ejerstatu_kod ibert_se_nr ktotype_kod rest_mark_kod, stable
	drop bankrgist_nr ibert_se_nr op_se_nr kurs_vaerdi_blb  rest_lb_aar_ant rnt_fod_pct ejerstatu_kod	
	collapse (last) ktotype_kod (sum) rest_gaeld_blb (last) rest_mark_kod (sum) rnt_blb (last) restance, by(pnr kto_nr indk_aar) fast
	duplicates report pnr kto_nr indk_aar
	bysort pnr indk_aar: egen rest_all = max(restance)
	levelsof ktotype_kod, local(type)
	foreach x in `type' {
		gen 	rest`x'_temp=(rest_mark_kod==2) if !mi(rest_mark_kod) & rest_gaeld_blb>0 & ktotype_kod==`x'
		bysort pnr indk_aar: egen rest`x'=max(rest`x'_temp) 
		drop rest`x'_temp
	}
	sort pnr kto_nr indk_aar
	gen brug1=1 if 	pnr==pnr[_n-1] /// 
				&	kto_nr==kto_nr[_n-1] ///
				& 	indk_aar-1==indk_aar[_n-1]	
	rename rest_gaeld_blb saldo_ultimo
	gen saldo_primo=saldo_ultimo[_n-1] if brug1==1
	gen saldo_medio=(saldo_primo+saldo_ultimo)/2 if brug1==1
	gen medio=1  if saldo_medio>=1000 ///
				 & 	rnt_blb>0 ///
				 &	saldo_primo>=0 /// 
				 &	saldo_ultimo>=0 ///
				 &  ktotype_kod==56
	gen ultimo=1 if	saldo_ultimo>=1000 ///
				 & 	rnt_blb>0 ///
				 &  ktotype_kod==56
	gen rente_medio =rnt_blb/saldo_medio  if medio ==1
	gen rente_ultimo=rnt_blb/saldo_ultimo if ultimo==1
	drop brug1 medio ultimo
	sort pnr kto_nr indk_aar
	rename indk_aar year
	bysort pnr year: egen max_rente=max(rente_medio) 
	bysort pnr year: egen antal_restance = total(rest_all), missing
	levelsof ktotype_kod, local(type)
	foreach x in `type' {
		gen 	temp=saldo_ultimo if rest_mark_kod==2 & ktotype_kod==`x'
		bysort pnr year: egen blb_rest`x'=total(temp) 
		drop temp
	}
	
	drop kto_nr ktotype_kod saldo_ultimo rest_mark_kod rnt_blb restance saldo_primo saldo_medio rente_medio rente_ultimo
	duplicates drop
	duplicates report pnr year
	compress
	unab names: _all
	foreach x in `names' {
	    rename `x' `x'_efalle
	}
	rename pnr efalle
	rename year_efalle year
save "$raw\urte_efalle_edit.dta"

use "$raw\irte.dta", clear
	keep if indk_aar<2017
	keep pnr indk_aar
	rename indk_aar year
	duplicates drop
	sort pnr year 
save "$raw\irte_edit.dta"

* MERGE
use "$raw\bef_edit", clear
	merge 1:1 pnr year using "$raw\ind_edit"
	drop _merge 
	merge 1:1 pnr year using "$raw\indh_edit" 
	drop _merge 
	merge 1:1 pnr year using "$raw\idap_edit"
	drop _merge
	merge 1:1 pnr year using "$raw\idan_edit"
	drop _merge
	merge 1:1 pnr year using "$raw\pensform_edit"
	drop _merge
	merge 1:1 pnr year using "$raw\urte_edit"
	drop _merge
	merge 1:1 pnr year using "$raw\udda_edit"
	drop _merge
	merge 1:1 pnr year using "$raw\udg_edit"
	drop _merge
	compress
save "$temp/registerdata"

use "$raw\bef_fam_edit", clear
	merge 1:1 pnr year using "$raw\ind_fam_edit"
	drop _merge 
	merge 1:1 pnr year using "$raw\udda_fam_edit"
	drop _merge
compress
save "$temp/registerdata_fam"
